import re
import urlparse

REPLACE_STR = '$encrypted$'


class UriCleaner(object):
    REPLACE_STR = REPLACE_STR
    # https://regex101.com/r/sV2dO2/2
    SENSITIVE_URI_PATTERN = re.compile(ur'(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?\xab\xbb\u201c\u201d\u2018\u2019]))', re.MULTILINE)  # NOQA

    @staticmethod
    def remove_sensitive(cleartext):
        redactedtext = cleartext
        text_index = 0
        while True:
            match = UriCleaner.SENSITIVE_URI_PATTERN.search(redactedtext, text_index)
            if not match:
                break
            o = urlparse.urlsplit(match.group(1))
            if not o.username and not o.password:
                if o.netloc and ":" in o.netloc:
                    # Handle the special case url http://username:password that can appear in SCM url
                    # on account of a bug? in ansible redaction
                    (username, password) = o.netloc.split(':')
                else:
                    text_index += len(match.group(1))
                    continue
            else:
                username = o.username
                password = o.password

            # Given a python MatchObject, with respect to redactedtext, find and
            # replace the first occurance of username and the first and second
            # occurance of password

            uri_str = redactedtext[match.start():match.end()]
            if username:
                uri_str = uri_str.replace(username, UriCleaner.REPLACE_STR, 1)
            # 2, just in case the password is $encrypted$
            if password:
                uri_str = uri_str.replace(password, UriCleaner.REPLACE_STR, 2)

            t = redactedtext[:match.start()] + uri_str
            text_index = len(t)
            if (match.end() < len(redactedtext)):
                t += redactedtext[match.end():]

            redactedtext = t
            if text_index >= len(redactedtext):
                text_index = len(redactedtext) - 1

        return redactedtext


class PlainTextCleaner(object):
    REPLACE_STR = REPLACE_STR

    @staticmethod
    def remove_sensitive(cleartext, sensitive):
        if sensitive == '':
            return cleartext
        return re.sub(r'%s' % re.escape(sensitive), '$encrypted$', cleartext)
